# -*- coding: utf-8 -*-
"""
Created on Thu Apr 25 16:30:25 2019

@author: ba8rb2
"""

import os
import re
import pandas as pd
from matplotlib import pyplot as plt
import seaborn as sbs
sbs.set()


#%%
## Change user name and path in order for the script to run

user = "ba8rb2"

## Set Paths 
path ="C:\\Users\\" + user + "\\Dropbox\\Andere Aufgaben\\Artikel\\Schubi\\RAE-REF\\"

root ="C:\\Users\\" + user + "\\Dropbox\\Andere Aufgaben\\Artikel\\Schubi\\Python\\"
output = root + "output\\"


## Set directories, where Perplixity-and Coherence Values were saved
perp = output + "Perplexities\\"
coh =  output + "Coherence\\"

#%%

def dataframe_retrieval(p):
    """
    This function is used to create a dataframe concenating all excel-files
    in a given directory. 
    
    -------------------
    
    This funcion needs only a path (p) and returns a dataframe-object.
    
    """
    dfs = []
    df_list = os.listdir(p)
    
    
    
    for d in df_list:
        df = pd.read_excel(p + d)
        
        number = re.sub("([A-Za-z]|\.|\_)","",d)
        df["no_topics"] = int(number)
        
        dfs.append(df)
        
        
    dataframe = pd.concat(dfs)
    dataframe = dataframe.sort_values(by="no_topics", ascending=True)
    return(dataframe)




#%%
## Step 1:: Generate Dataframes for Perplexity and Coherence


coherence_df = dataframe_retrieval(coh)
perplexities_df = dataframe_retrieval(perp)

coherence_df = coherence_df[coherence_df["no_topics"] <= 100]
perplexities_df = perplexities_df[perplexities_df["no_topics"] <= 100]


## Step 2: Drawing a plot for Perplexities and Coherence



xlabs = [x for x in perplexities_df["no_topics"]]
# figsize=[20,8]
fig, (ax1,ax2) = plt.subplots(2,1, sharex=True, figsize=[14,7])
ax1.plot(coherence_df["no_topics"], coherence_df["u_mass"], "black")
ax1.set_title("Log. UMass Coherence", size=20, fontweight= "bold")
ax1.set(ylabel="Coherence-Value")
ax2.plot(perplexities_df["no_topics"], perplexities_df["Perplexity"], "black")
ax2.set_title("Log. Perplexity", size=20, fontweight= "bold")
ax2.set(xlabel="Number of Topics", ylabel="Perplexity-Score")
ax2.set_xticks(xlabs)
ax2.tick_params(axis="x",labelrotation=90)
fig.tight_layout()
fig.savefig(output + "Graphics\\Other\\" + "2020_01_03_"+"coherences_perplexity_gray.png")
plt.close()





















